/* These help in encoding byte-level indices: 1st byte's top 2 bits tell the #of bytes - 1 in offset-difference encoding; offset-diff 0 => new file follows */
#define MaxNum1BPartition (MaxNum8bPartition & 0x3f) /* 62: top byte is 0x00 | x % MaxNum8bPartition === x; just encode x */
#define MaxNum2BPartition (MaxNum1BPartition * MaxNum8bPartition) /* top byte = 0x40 | x / MaxNum8bPartition; rest is x % ~; encode both separately */
#define MaxNum3BPartition (MaxNum1BPartition * MaxNum16bPartition) /* top byte = 0x80 | x / MaxNum16bPartition; rest is x % ~; encode both separately */
#define MaxNum4BPartition (MaxNum1BPartition * MaxNum24bPartition) /* top byte = 0xc0 | x / MaxNum24bPartition; rest is x % ~; encode both separately */
#define MIN_WORDS 50 /* before we inform about numeric words */
#define MAX_SEARCH_PERCENT 20 /* warn user if searching > this % of blocks */
#define DEF_MAX_INDEX_PERCENT 80 /* if word in > 80%, say everywhere for one-file-per-block */
#define DONT_CONFUSE_SORT 1
#define WORD_END_MARK 2
#define ALL_INDEX_MARK 3 /* If this, then word is in > 60% of blocks */
#define ATTR_END_MARK 4 /* After list of attributes before file offset/block numbers */
#define AVG_WORD_LEN 12 /* average word length is 8-9 including '\0': have safety margin */
#define MAX_NAME_SIZE 256
#define MAX_NAME_LEN MAX_NAME_SIZE
#define MaxNameLength MAX_NAME_SIZE
#define MAX_LINE_SIZE 1024
#define MAX_LINE_LEN 1024
#define MAX_SORTLINE_LEN (MAX_LINE_LEN * 16) /* Can be ((MaxNum16bPartition*sizeof(int)+MAX_NAME_LEN)*MAX_INDEX_PERCENT/100) in the worst case */
#define MAX_NAME_BUF MAX_NAME_SIZE
#define MAX_WORD_SIZE 64 /* w/o '\0'; was 24 in 2.1 */
#define MAX_WORD_LEN MAX_WORD_SIZE
#define MAX_WORD_BUF 80 /* was 32 in 2.1 */
#define MAX_PAT 256
#define MAXNUM_INDIRECT MaxNum8bPartition
#define MAX_INDEX_BUF (MAX_PARTITION + 1 + 2*MAX_WORD_BUF + 2) /* index line length without OneFilePerBlock */
#define DEF_REAL_INDEX_BUF (MaxNum16bPartition + 2*MAX_WORD_BUF + 2) /* index line length with OneFilePerBlock */
/* Must write fresh code to calculate these sets based by multiplying defaults below with round(file_num, MaxNum16bPartition) */
#define DEF_FILESET_SIZE MaxNum16bPartition /* used when OneFilePerBlock is ON */
#define DEF_FILEMASK_SIZE (DEF_FILESET_SIZE/(8*sizeof(int)) + 4) /* bit mask of files */
#define DEF_REAL_PARTITION (DEF_FILEMASK_SIZE + 4) /* must be > MAX_PARTITION + 1 */
/* block must be in 0..DEF_FILESET_SIZE-1, and integers should represent bit-masks */
#define block2index(i) (i/(8*sizeof(int)))
#define block2mask(i) (1<<(i%(8*sizeof(int)))) /* not used */
#define round(x, y) (((x)+(y)-1)/(y))
#define FILES_PER_PARTITION(x) (16 + round(x, MAX_PARTITION)*4) /* 16 is minimum length of buffer: thereafter, allow noise upto 4 times average */
* Just stores the word, wordlength and offset present in a line of the index in a structure (when made with -o or -b).
* Doesn't store the attribute since we just need a hint into .glimpse_index from where agrep should begin search.
*/
#define WORD_SORTED 0
#if WORD_SORTED
struct mini {
char *word;
long offset;
};
/* Region searched with strcmp. #of regions = mini_array_len = (`wc -l .glimpse_index` - 3) / WORDS_PER_REGION */
#define WORDS_PER_REGION 128
#else /* WORD_SORTED */
struct mini {
long offset;
};
/* Range of each mini_array entry is words with same hash32k value => 32K offsets into the index need to be stored */
#define MINI_ARRAY_LEN (64*1024)
#endif /* WORD_SORTED */
/* For incremental indexing only */
typedef struct _name_hashelement {
struct _name_hashelement *next;
char *name;
int name_len;
int index;
} name_hashelement;
/*
* Limit on number of files is MaxNum24bPartition. To change it, you need
* to add encode/decode code everywhere, INDEX_ELEM_FREE and MAXNUM_INDIRECT.
*
* Limit on number of attributes is MaxNum16bPartition. To change it, you
* need to add encode/decode code everywhere. That is: merge_splits(),
* save_data_structures(), traverse(), merge_in() and scanword()
* in glimpseindex; get_set() in glimpse; and printx.c.
*
* No need to change any other data structures.
*/
/* Names of various system commands used in glimpseindex: use mv/rm etc rather than rename()/unlink() since former don't return unless parent-dir is sync-ed */
#define SYSTEM_SORT "sort" /* replace with different sort with longer lines. Later write a procedure for sort that doesn't need system() */
#define SYSTEM_LS "ls"
#define SYSTEM_MV "mv" /* this doesn't work with SFS */
#define SYSTEM_RM "rm" /* this doesn't work with SFS */